************************************************************************************************
* This command file contains several important functions used throughout the replication code. *
************************************************************************************************
* 1. Spatial demand model estimator for total output
* 2. Market-level consumer surplus calculator
* 3. Per-firm profit calculator for a single output
* 4. Per-firm profit calculator for a multi-product firm

******************************************************
* 1. Spatial demand model estimator for total output *
******************************************************
clear mata
set more off
set seed 1234
set trace off

//COMPONENT 1: COMMAND SHELL IN STATA
//This creates a shell for the command that helps Stata recognize it as an estimation command (eclass)
program define spatialdemand_logQ, eclass sortpreserve
    version 14
//Section 1: Defining the syntax of the program and splitting the data into dependent variables and explanatory variables, as well as other auxiliary data (such as market size).
    syntax varlist(numeric ts fv) [if] [in] [, noCONStant]
    marksample touse //The variable which indicates whether an observation is used for the regression is called "touse".
    tempname b V N rank df_r X //We generate a couple of temporary names for our beta variables (b), the variance (V), the number of observations (N), the rank (rank), the degrees of freedom (df_r) and the explanatory variables (X).
	gettoken firmid remains1 : varlist //We take the first variable given to the command and call it "firmid" because it corresponds to the firm ID.
    gettoken depvar remains2 : remains1 //The second variable in the command contains the dependent variable (i.e. the measure of demand).
    gettoken marketid remains3 : remains2 //The third variable in the command contains the market ID.
    gettoken marketsize cnames : remains3 //The fourth variable measures market size. The remaining variables are referred to as cnames, which is the name of the object containing the control variables for which there will be estimated parameters.

//Section 2: Calling the mata program (this is defined below)
    mata: mywork_logQ("`varlist'", "`touse'", "`constant'", "`b'", "`V'", "`N'", "`rank'", "`df_r'") //This calls up the optimizer function. We feed into it locals containing the names of the variables, which are taken from the syntax of the function.

//Section 3: Based on the output of the mata program, we can then store some information which is necessary for the post-estimation results.
    if "`constant'" == "" { //`constant' includes whatever is specified in the noconstant option. If this is empty, then we should include a constant.
    	local cnames `cnames' _cons nest //If the user has not specified that they don't want to have a constant, then we need to add a constant to the list on factor variables. Here we also add a reference to the nest parameter, which has no control variable of its own and is added to the back of the dataset, similarly to the constant.
    }

    matrix colnames `b' = `cnames' //We name the parameters in the beta vector to correspond to the variables.
    matrix colnames `V' = `cnames' //We similarly name the columns and rows of the variance matrix.
    matrix rownames `V' = `cnames'

	//This specifies what information will be used to summarize the results after the estimation is complete.
    ereturn post `b' `V', esample(`touse') 
    ereturn scalar N       = `N'
    ereturn scalar rank    = `rank'
    ereturn scalar df_r    = `df_r'
    ereturn local  cmd     "spatial demand regression"
	
    ereturn display
end

//COMPONENT 2: SPATIAL DEMAND ESTIMATES IN MATA
//Spatial demand function: this calculates the spatial demand for a given set of control variables (X) and parameters (beta).
mata:
 numeric matrix spdemand_log(numeric matrix X, numeric matrix beta) //Here we specify what kind of arguments we want to be fed into the function.
{
    numeric matrix demand //This defines the result as a numeric matrix called demand.
	nest_parameter=1/(1+exp(-beta[rows(beta)])) //The nest parameter is transformed in order to be strictly within the 0-1 interval.
	X=sort(X,3) //Sort the results by market ID. This is crucial, as it is necessary for the definition of groups.
	firm_utility=exp((X[,5..cols(X)]*beta[1..(rows(beta)-1)])/nest_parameter) //The utility derived (per capita) from a specific firm f in market i (nx notary-market combinations x 1).
 info = panelsetup(X, 3) //Each row of info corresponds to a group based on the market ID, i.e. it represents the observations for a given market i.
 //The first element of the row shows the observation id of the first element of the group, i.e. the first notary-market combination for market i.
 //The second element of the row shows the observation id of the last element of the group, i.e. the last notary-market combination for market i.
 n_markets   = rows(info) //The number of markets corresponds to the  number of rows in the info matrix.
 nest_multiplier= J(n_markets, 1, 0) 
 prob=J(rows(X),1,0) //this is vector with rows equal to the number of notary-market combinations. It will contain the probability that the consumers of a given market buy from a specific notary.
for(i=1; i<=n_markets; i++) { //i goes over the markets
     market_utility = panelsubmatrix(firm_utility,i,info) //Use the i-th row of the info matrix to cut out the group-specific rows of the utility matrix. This selects all firm utilities with-in a given market.
     nest_utility=sum(market_utility) //Sum all firm utilities with-in a given market.
	 nest_multiplier[i,1]=((nest_utility)^(nest_parameter-1))/(1+nest_utility^nest_parameter) //NOTE: this is not the probability that the consumer will purchase notary services, since we use "nest_parameter-1" in the exponential in the numerator. We do this to spare ourselves a division by the sum of all notary utilities later.
	 k0=info[i,1] //This is the observation number of the first element in the group (i.e. the first notary-market observation which belongs to market i).
	 k1=info[i,2] //This is the observation number of the last element in the group (i.e. the last notary-market observation which belongs to market i).
prob[k0..k1]=firm_utility[k0..k1]*nest_multiplier[i,1] //This is the probability that the consumers from market i purchase from a specific notary.
}
X_res=X,prob //Here we concatinate the probabilities to the vector of X. We rename the vector X_res, mainly to avoid overwriting the original matrix, which may be referred to by other code later on in the analysis or will be necessary if we loop over this again.
demand_per_market=X_res[,cols(X_res)]:*X_res[,4] //The probability is in the last column of X_res. This is multiplied by the market size (which is in the fourth column) to calculate the expected sales of a notary in a given market.
X_res=X_res,demand_per_market //Here we concatinate the expected sales per notary per market to the vector of X_res.
X_res=sort(X_res,1) //Sort the results by firm_ID.
info2 = panelsetup(X_res, 1) //Generate groups based on firm IDs (these are the markets served by that specific notary).
n_firms   = rows(info2) //Calculate the total number of notaries.
demand=J(n_firms,3,0) //Generate the demand matrix, which contains rows equivalent to a specific notary. The first column consists of the ID of the notary. The second colum contains the log estimated demand per notary. The third column contains the log observed demand per notary.
for(i=1; i<=n_firms; i++) { //i goes over the firms.
demand_not = panelsubmatrix(X_res[,cols(X_res)],i,info2) //demand_not is the portion of the vector of estimated demands per market that corresponds to notary i.
id_not = panelsubmatrix(X_res[,1],i,info2) //id_not is the portion of the vector of firm IDs (saved as the first column of matrix X) that contains the ID of notary i (these should be identical values).
real_demand_not = panelsubmatrix(X_res[,2],i,info2) //real_demand_not is the portion of the vector of sales that corresponds to the actual sales of notary i (again, the values should be identical across markets).

demand[i,1]=mean(id_not) //The first column of the demand matrix contains the IDs of the firms.
demand[i,2]=log(sum(demand_not)) //The second column of the demand matrix contains the log of the sum of market-level demands of the notary.
demand[i,3]=log(mean(real_demand_not)) //The third column contains the log  of the mean observed demand. Since we do not observe market-specific demand per notary, this is just the mean of a vector of identical demand values (at the notary level).
}
return(demand) //This specifies that the outcome of the function should be a matrix called demand.
}
end

//COMPONENT 3: OPTIMIZATION SETTINGS IN MATA
//This defines how the spatial demand will be optimized. 
mata:

void mywork_logQ( string scalar vars, 
             string scalar touse,   string scalar constant, 
	     string scalar bname,   string scalar Vname,     
	     string scalar nname,   string scalar rname,     
	     string scalar dfrname) //Here we take the variable names which are relevant for the estimation and store them as mata strings to use to refer to variables in the optimization.
{

    real vector b //The results of the estimation are a vector of beta parameters,
    real matrix X, V //a matrix X containing the explanatory variables and a matrix V containing the covariance matrix,
    real scalar n, k //and a scalar n measuring the number of observations and k measuring the number of control variables.

    X    = st_data(., vars, touse) //This selects the variables which were listed in the spatialdemand_logQ command in Stata.
    nx    = rows(X) //This calculates the number of observations (market-notary combinations).
    if (constant == "") { //This states that a constant should be added.
        X    = X,J(nx,1,1) //Here a constant vector is concatenated to X, making the last observation in X a unit vector.
    }

//Define the rules for the optimization:
S  = optimize_init() //S is going to contain all of the relevant information for optimization.
optimize_init_argument(S, 1, X) //Enter the data which will be used for optimization.
optimize_init_technique(S,"bfgs") //Define the optimization technique.
rseed(1234) //Set the seed
optimize_init_conv_maxiter(S,500) //Set the maximum number of iterations.
optimize_init_evaluator(S, &demandeval_log()) //The evaluator function is called demandeval_log (and is defined below). Store this information into the object S, which contains the address of the revelant information for the evaluator.
//By preceding the name of the evaluator function demandeval_log() with an ampersand (&), I put the address of the evaluator function into S. optimize() requires that you put the function address instead of the function name because having the address speeds up finding the function.
//Set the initial values.
initial_values=(-.0189848,     .06117361,    -.00112983,    -.04467321,     12.348962,      8.045473,     1.4449715,    -.16024572,    -19.950343,    -2.1351975)
optimize_init_params(S, initial_values) 
b = optimize(S) //Beta will be defined by optimizing S.
b=b' //Transpose the results to get a row vector.
V    = optimize_result_V_oim(S) //Variance matrix
k = cols(X) - 3 - diag0cnt(invsym(V)) //Rank of the variance matrix
n = rows(uniqrows(X[,1])) //Number of observations.

    st_matrix(bname, b') //We export the results back into Stata, with the matrix bname containing the estimates from b. Similarly for the other variables.
    st_matrix(Vname, V)
    st_numscalar(nname, n)
    st_numscalar(rname, k)
    st_numscalar(dfrname, n-k)

}
end

//COMPONENT 4: FUNCTION TO BE OPTIMIZED IN MATA.
//The evaluator maximizes, which is why we calculate the negative of the sum of squared errors.
mata:
void demandeval_log(real scalar todo, real vector b, real matrix X, val, grad, hess)
 {
    real matrix demand
    demand = spdemand_log(X,b') //The spdemand_log command generates a demand matrix in which the first column contains the IDs of the firms, the second column contains the log of predicted demand and the third column contains the log of the observed demand.
//This is the negative of the loss function (i.e. the difference between the log of observed demand and the log of predicted demand, squared).
    val = -sum((demand[,3]-demand[,2]):^2)
 }
end

***********************************************
* 2. Market-level consumer surplus calculator *
***********************************************

//This function returns a matrix in which the first column contains the IDs of the markets and the second column contains the market-level consumer surplus.
//Make sure to sort the observations by market before putting the dataset into the matrix X.
//The vector beta corresponds to the estimates from the demand estimation.
//The numeric scalar transportation costs is pre-defined by the user.
mata:
 numeric matrix consumer_surplus(numeric matrix X, numeric matrix beta, numeric scalar transportation) //Here we specify what kind of arguments we want to be fed into the function.
//Additionally, we say that this function actually generates a matrix, rather than defining it as a general function.
{
    numeric matrix consumer_surplus
	nest_parameter=1/(1+exp(-beta[rows(beta)])) //The nest parameter needs to be transformed, since it was generated in a way which ensures values within the 0-1 interval.
		X=sort(X,(3,1)) //Sort the results by market ID. This is crucial, as it is necessary for the definition of groups.
firm_utility=exp((X[,5..cols(X)]*beta[1..(rows(beta)-1)])/nest_parameter) //The utility derived (per capita) from a specific firm f in market i (nx notary-market combinations x 1).
 info = panelsetup(X, 3) //Each row of info corresponds to a group based on the market ID, i.e. it represents the observations for a given market i.
 //The first element of the row shows the observation id of the first element of the group, i.e. the first notary-market combination for market i.
 //The second element of the row shows the observation id of the last element of the group, i.e. the last notary-market combination for market i.
 n_markets   = rows(info) //Number of markets
 consumer_surplus_aux=J(n_markets,3,0) //This is vector with rows equal to the number of markets. It contains three empty (zero) columns.
 //The first column of consumer_surplus_aux will contain the id of the market.
 //The second column of consumer_surplus_aux will contain the surplus per consumer.
 //The third column will contain the market size, which scales the consumer surplus.
 for(i=1; i<=n_markets; i++) { //i goes over the markets
 market_utility = panelsubmatrix(firm_utility,i,info) //Use the i-th row of the info matrix to cut out the group-specific rows of the utility matrix. This selects all firms utilities with-in a given market.
 nest_utility=sum(market_utility) //Sum all firm utilities with-in a given market.
 distance_parameter=beta[1] //The first control variable is distance, hence beta_1 measures the utility cost of distance
 consumer_surplus_aux[i,2]=-(transportation/distance_parameter)*ln(1+nest_utility^nest_parameter) //This is the per-capita surplus from purchasing notary services.
 id_market = panelsubmatrix(X[,3],i,info) //The third column of X contains the ID of the market. We select the market variable for all observations from the market. This should result in a vector containing the same number.
 consumer_surplus_aux[i,1]=mean(id_market) //The mean ID should be the same as the ID, since there is no variation in market IDs within a given market.
 size_market = panelsubmatrix(X[,4],i,info) //The fourth column of X contains the size of the market. We select the market size variable for all observations from the market. This should result in a vector containing the same number. 
 consumer_surplus_aux[i,3]=mean(size_market) //The mean market size should be the same as the market size, since there is no variation in market population within a given market.
	 }
consumer_surplus=J(n_markets,2,0) //This is vector with rows equal to the number of markets.
consumer_surplus[,1]=consumer_surplus_aux[,1] //The first column of consumer_surplus contains market IDs.
consumer_surplus[,2]=consumer_surplus_aux[,2]:*consumer_surplus_aux[,3] //The second column contains the tract-level consumer surplus multiplied by market size (i.e. the total surplus in a market).
X=sort(X,(3,1)) //Sort the results by market ID and firm ID.
return(consumer_surplus)
}
end

*****************************************************
* 3. Per-firm profit calculator for a single output *
*****************************************************
mata:
 numeric matrix per_firm_profit_mult(numeric matrix X, numeric matrix beta, numeric scalar p_Q, numeric scalar nu, numeric scalar aL, numeric scalar phi, numeric scalar aM) //Here we specify the parameters necessary for the calculation.
{
    numeric matrix profit //This defines the result as a numeric matrix called profit.
	nest_parameter=1/(1+exp(-beta[rows(beta)])) //The nest parameter is transformed in order to be strictly within the 0-1 interval.
	X=sort(X,3) //Sort the results by market ID. This is crucial, as it is necessary for the definition of groups.
	firm_utility=exp((X[,5..cols(X)]*beta[1..(rows(beta)-1)])/nest_parameter) //The utility derived (per capita) from a specific firm f in market i (nx notary-market combinations x 1).
 info = panelsetup(X, 3) //Each row of info corresponds to a group based on the market ID, i.e. it represents the observations for a given market i.
 //The first element of the row shows the observation id of the first element of the group, i.e. the first notary-market combination for market i.
 //The second element of the row shows the observation id of the last element of the group, i.e. the last notary-market combination for market i.
 n_markets   = rows(info) //The number of markets corresponds to the  number of rows in the info matrix.
 nest_multiplier= J(n_markets, 1, 0) 
 prob=J(rows(X),1,0) //This is a vector with rows equal to the number of notary-market combinations. It will contain the probability that the consumers of a given market buys from a specific notary.
for(i=1; i<=n_markets; i++) { //i goes over the markets
     market_utility = panelsubmatrix(firm_utility,i,info) //Use the i-th row of the info matrix to cut out the group-specific rows of the utility matrix. This selects all firm utilities with-in a given market.
     nest_utility=sum(market_utility) //Sum all firm utilities with-in a given market.
	 nest_multiplier[i,1]=((nest_utility)^(nest_parameter-1))/(1+nest_utility^nest_parameter) //NOTE: this is not the probability that the consumer will purchase notary services, since we use "nest_parameter-1" in the exponential in the numerator. We do this to spare ourselves a division by the sum of all notary utilities later.
	 k0=info[i,1] //This is the observation number of the first element in the group (i.e. the first notary-market observation which belongs to market i).
	 k1=info[i,2] //This is the observation number of the last element in the group (i.e. the last notary-market observation which belongs to market i).
prob[k0..k1]=firm_utility[k0..k1]*nest_multiplier[i,1] //This is the probability that the consumers from market i purchase from a specific notary.
}
X_res=X,prob //Here we concatinate the probabilities to the vector of X. We rename the vector X_res, mainly to avoid overwriting the original matrix, which may be referred to by other code later on in the analysis or will be necessary if we loop over this again.
demand_per_market=X_res[,cols(X_res)]:*X_res[,4] //The probability is in the last column of X_res. This is multiplied by the market size (which is in the fourth column) to calculate the expected sales of a notary in a given market.
X_res=X_res,demand_per_market //Here we concatinate the expected sales per notary per market to the vector of X_res.
X_res=sort(X_res,(1,3)) //Sort the results by firm_ID.
info2 = panelsetup(X_res, 1) //Generate groups based on firm IDs (these are the markets served by that specific notary).
n_firms   = rows(info2) //Calculate the total number of notaries.
demand=J(n_firms,3,0) //Generate the demand matrix, which contains a row for each notary. The first column consists of the ID of the notary. The second colum contains the log estimated demand per notary. The third column contains the log observed demand per notary.
cost=J(n_firms,2,0) //Generate the cost matrix, which contains a row for each notary. The first column consists of the ID of the notary. The second colum contains the costs of the notary.
revenue=J(n_firms,2,0) //Generate the revenue matrix, which contains a row for each notary. The first column consists of the ID of the notary. The second colum contains the revenue of the notary.
profit=J(n_firms,2,0) //Generate the profit matrix, which contains a row for each notary. The first column consists of the ID of the notary. The second colum contains the profits of the notary.
for(i=1; i<=n_firms; i++) { //i goes over the firms.
demand_not = panelsubmatrix(X_res[,cols(X_res)],i,info2) //demand_not is the portion of the vector of estimated demands per market that corresponds to notary i.
id_not = panelsubmatrix(X_res[,1],i,info2) //id_not is the portion of the vector of firm IDs (saved as the first column of matrix X) that contains the ID of notary i (these should be identical values).
real_demand_not = panelsubmatrix(X_res[,2],i,info2) //real_demand_not is the portion of the vector of sales that corresponds to the actual sales of notary i (again, the values should be identical across markets).
demand[i,1]=mean(id_not) //The first column of the demand matrix contains the IDs of the firms.
demand[i,2]=log(sum(demand_not)) //The second column of the demand matrix contains the log of the sum of market-level demands of the notary.
demand[i,3]=log(mean(real_demand_not)) //The third column contains the log  of the mean observed demand. Since we do not observe market specific demand per notary, this is just the mean of a vector of identical demand values (at the notary level).
cost[i,1]=demand[i,1] //This is the ID of the firm
cost[i,2]=exp(-aL)*(exp(demand[i,2])^(1/nu))+exp(-aM/phi)*(exp(demand[i,2])^(1/(nu*phi))) //DEMAND IS IN LOGS, so it contains lnQ_IV
revenue[i,1]=demand[i,1] //This is the ID of the firm
revenue[i,2]=p_Q*exp(demand[i,2]) //Price times quantity.
profit[i,1]=demand[i,1] /*Firm ID*/
profit[i,2]=revenue[i,2]-cost[i,2] /*Variable profit*/
X=sort(X,(3,1)) //Sort the results by market ID and firm ID.

}
return(profit) //This specifies that the outcome of the function should be a matrix called profit.
}
end

**********************************************************
* 4. Per-firm profit calculator for a multi-product firm *
**********************************************************

mata:
 numeric matrix per_firm_profit_mult_v2(numeric matrix X_Q1, numeric matrix X_Q2, numeric matrix beta_Q1, numeric matrix beta_Q2, numeric scalar p_Q1, numeric scalar p_Q2, numeric scalar nu, numeric scalar aL, numeric scalar phi, numeric scalar aM, numeric scalar alpha) //Here we specify what kind of arguments are used in the function.
{
    numeric matrix profit //This defines the result as a numeric matrix called profit.
log_demand_Q1=spdemand_log(X_Q1,beta_Q1) //Calculate log demand for real-estate transactions.
X_Q1=sort(X_Q1,(3,1)) //Sort the results by market ID and firm ID.

log_demand_Q2=spdemand_log(X_Q2,beta_Q2) //Calculate log demand for other transactions.
X_Q2=sort(X_Q2,(3,1)) //Sort the results by market ID and firm ID.

n_firms   = rows(log_demand_Q1) //Calculate the total number of notaries.
cost=J(n_firms,2,0) //Generate the cost matrix, which contains a row for each notary. The first column consists of the ID of the notary. The second colum contains the costs of the notary.
revenue=J(n_firms,2,0) //Generate the revenue matrix, which contains a row for each notary. The first column consists of the ID of the notary. The second colum contains the revenue of the notary.
profit=J(n_firms,2,0) //Generate the profit matrix, which contains a row for each notary. The first column consists of the ID of the notary. The second colum contains the profits of the notary.
cost[,1]=log_demand_Q1[,1] //This is the ID of the firm
cost[,2]=exp(-aL):*(alpha:*exp(log_demand_Q1[,2])+(1-alpha):*exp(log_demand_Q2[,2])):^(1/nu)+exp(-aM/phi):*(alpha:*exp(log_demand_Q1[,2])+(1-alpha):*exp(log_demand_Q2[,2])):^(1/(nu*phi))
revenue[,1]=log_demand_Q1[,1] //This is the ID of the firm
revenue[,2]=p_Q1*exp(log_demand_Q1[,2])+p_Q2*exp(log_demand_Q2[,2]) //Price times quantity.
profit[,1]=log_demand_Q1[,1] /*Firm ID*/
profit[,2]=revenue[,2]-cost[,2] /*Variable profit*/
return(profit) //This specifies that the outcome of the function should be a matrix called profit.
}
end
